Input data
space_missions <- read_csv("space_missions.csv")
## Rows: 4630 Columns: 9── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Company, Location, Rocket, Mission, RocketStatus, MissionStatus
## date (1): Date
## time (1): Time
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(space_missions)
space_missions_data_dictionary <- read_csv("space_missions_data_dictionary.csv")
## Rows: 9 Columns: 2── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Field, Description
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View(space_missions_data_dictionary)
Data Wrangling
We create a new dataframe with 2 columns: 1 for months and the other for the number of space missions in that month.
#group data by month and sum sales
sp <- space_missions %>%
group_by(Date) %>%
summarise(total_count=n(),
.groups = 'drop') %>%
as.data.frame()
sp <- sp %>%
group_by(month = lubridate::floor_date(Date, 'month')) %>%
summarize(sum = sum(total_count))
new_data <- pad(sp)
## pad applied on the interval: month
new_data[is.na(new_data)] <- 0
sp <- new_data
spSum = sp$'sum'
#View(sp)
summary(sp)
## month sum
## Min. :1957-10-01 Min. : 0.000
## 1st Qu.:1973-12-08 1st Qu.: 4.000
## Median :1990-02-15 Median : 5.000
## Mean :1990-02-14 Mean : 5.951
## 3rd Qu.:2006-04-23 3rd Qu.: 8.000
## Max. :2022-07-01 Max. :24.000
times <- ts(spSum)
plot.ts(times, ylab = 'Number of space missions')
#tsplot(spSum, ylab = 'Number of space missions')
Non Parametric Trend
Smooth to with frequency = 1
SOI = ts(spSum, freq=1)
tsplot(SOI, col=8, ylab = 'Number of space missions') # the time scale matters (not shown)
lines(ksmooth(time(SOI), SOI, "normal", bandwidth=12), lwd=2, col=4)
Smooth with frequency = 4
SOI = ts(spSum, freq=4)
tsplot(SOI, col=8, ylab = 'Number of space missions') # the time scale matters (not shown)
lines(ksmooth(time(SOI), SOI, "normal", bandwidth=12), lwd=2, col=4)
Smooth with frequency = 12
SOI = ts(spSum, freq=12)
tsplot(SOI, col=8, ylab = 'Number of space missions') # the time scale matters (not shown)
lines(ksmooth(time(SOI), SOI, "normal", bandwidth=12), lwd=2, col=4)
#Lowess
trend(spSum, lowess = TRUE, main='Space missions')
Fitting SARIMA Model
Now we check the detrended and differenced time series to see which one is stationary
par(mfrow=2:1) # plot transformed data
tsplot(detrend(spSum), main="detrended" )
tsplot(diff(spSum), main="differenced" )
Differenced time series looks stationary.
tsplot(diff(sp$sum))
adf.test(diff(sp$sum))
## Warning in adf.test(diff(sp$sum)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(sp$sum)
## Dickey-Fuller = -13.651, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(diff(sp$sum))
## Warning in pp.test(diff(sp$sum)): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: diff(sp$sum)
## Dickey-Fuller Z(alpha) = -987.42, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(diff(sp$sum))
## Warning in kpss.test(diff(sp$sum)): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(sp$sum)
## KPSS Level = 0.03402, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(diff(sp$sum)))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8480 -1.6011 -0.1291 1.4235 11.3457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -3.81552 0.16641 -22.928 < 2e-16 ***
## yd.diff.lag1 1.90757 0.14565 13.097 < 2e-16 ***
## yd.diff.lag2 1.19209 0.11245 10.601 < 2e-16 ***
## yd.diff.lag3 0.62419 0.07414 8.420 < 2e-16 ***
## yd.diff.lag4 0.22662 0.03523 6.433 2.2e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.572 on 767 degrees of freedom
## Multiple R-squared: 0.8281, Adjusted R-squared: 0.8269
## F-statistic: 738.8 on 5 and 767 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -22.9284
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
After 4 tests, I am confident that the differenced time series is stationary.
mod1 <- Arima(sp$sum, order = c(0,1,0))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
mod1
## Series: sp$sum
## ARIMA(0,1,0)
##
## sigma^2 = 12.29: log likelihood = -2077.04
## AIC=4156.09 AICc=4156.09 BIC=4160.74
mod1 <- Arima(sp$sum, order = c(0,1,1), seasonal = list(order=c(2,1,2),period=12))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -8.0686, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -829.62, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.62403, Truncation lag parameter = 6, p-value = 0.02045
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5673 -1.6319 -0.1215 1.3951 8.8777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -1.0811953 0.0863710 -12.518 <2e-16 ***
## yd.diff.lag1 0.0057388 0.0765606 0.075 0.940
## yd.diff.lag2 0.0115913 0.0657598 0.176 0.860
## yd.diff.lag3 -0.0001271 0.0531399 -0.002 0.998
## yd.diff.lag4 -0.0257802 0.0361540 -0.713 0.476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.369 on 768 degrees of freedom
## Multiple R-squared: 0.5384, Adjusted R-squared: 0.5354
## F-statistic: 179.1 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -12.518
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(0,1,1)(2,1,2)[12]
##
## Coefficients:
## ma1 sar1 sar2 sma1 sma2
## -0.8909 0.6934 0.0489 -1.6014 0.6014
## s.e. 0.0157 0.1567 0.0474 0.1541 0.1515
##
## sigma^2 = 5.713: log likelihood = -1771.13
## AIC=3554.25 AICc=3554.36 BIC=3582.09
mod1 <- Arima(sp$sum, order = c(1,2,1), seasonal = list(order=c(2,1,2),period=12))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -13.542, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -641.38, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.040132, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.0281 -1.4658 -0.0432 1.4804 8.9081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -2.70289 0.12810 -21.100 < 2e-16 ***
## yd.diff.lag1 1.31861 0.10863 12.139 < 2e-16 ***
## yd.diff.lag2 0.78230 0.08671 9.022 < 2e-16 ***
## yd.diff.lag3 0.46513 0.05913 7.866 1.24e-14 ***
## yd.diff.lag4 0.16828 0.03565 4.721 2.80e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.412 on 768 degrees of freedom
## Multiple R-squared: 0.682, Adjusted R-squared: 0.6799
## F-statistic: 329.4 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -21.0997
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(1,2,1)(2,1,2)[12]
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1 sma2
## -0.5385 -1.0000 0.6880 0.0507 -1.5875 0.5877
## s.e. 0.0306 0.0051 0.1318 0.0454 0.1297 0.1268
##
## sigma^2 = 7.848: log likelihood = -1897.51
## AIC=3809.02 AICc=3809.16 BIC=3841.49
mod1 <- Arima(sp$sum, order = c(2,2,1), seasonal = list(order=c(2,2,2),period=12))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -12.699, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -583.64, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.023553, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4665 -1.6537 -0.0297 1.3126 8.1187
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -2.18923 0.11103 -19.718 < 2e-16 ***
## yd.diff.lag1 0.97241 0.09344 10.406 < 2e-16 ***
## yd.diff.lag2 0.67920 0.07379 9.204 < 2e-16 ***
## yd.diff.lag3 0.32969 0.05542 5.949 4.10e-09 ***
## yd.diff.lag4 0.14768 0.03577 4.128 4.05e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.465 on 768 degrees of freedom
## Multiple R-squared: 0.6116, Adjusted R-squared: 0.6091
## F-statistic: 241.9 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -19.7183
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(2,2,1)(2,2,2)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sma1 sma2
## -0.7273 -0.3414 -1.0000 -0.0066 0.0669 -1.8320 0.8320
## s.e. 0.0350 0.0344 0.0058 0.0715 0.0608 0.0784 0.0769
##
## sigma^2 = 7.462: log likelihood = -1880.44
## AIC=3776.89 AICc=3777.08 BIC=3813.87
auto.arima(sp$sum)
## Series: sp$sum
## ARIMA(1,1,2)
##
## Coefficients:
## ar1 ma1 ma2
## 0.6598 -1.6388 0.6872
## s.e. 0.1459 0.1300 0.1138
##
## sigma^2 = 6.204: log likelihood = -1810.96
## AIC=3629.91 AICc=3629.96 BIC=3648.53
mod1 <- Arima(sp$sum, order = c(1,1,2))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -9.1982, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -790.61, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.25578, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.228 -1.546 -0.134 1.572 11.888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -1.04341 0.08098 -12.885 <2e-16 ***
## yd.diff.lag1 0.02982 0.07213 0.413 0.679
## yd.diff.lag2 0.06909 0.06232 1.109 0.268
## yd.diff.lag3 0.03889 0.05147 0.756 0.450
## yd.diff.lag4 0.02444 0.03614 0.676 0.499
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.494 on 768 degrees of freedom
## Multiple R-squared: 0.5085, Adjusted R-squared: 0.5053
## F-statistic: 158.9 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -12.8852
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(1,1,2)
##
## Coefficients:
## ar1 ma1 ma2
## 0.6598 -1.6388 0.6872
## s.e. 0.1459 0.1300 0.1138
##
## sigma^2 = 6.204: log likelihood = -1810.96
## AIC=3629.91 AICc=3629.96 BIC=3648.53
mod1 <- Arima(sp$sum, order = c(1,1,2), seasonal = list(order=c(0,1,2),period=12))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -9.1248, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -766.37, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.56359, Truncation lag parameter = 6, p-value = 0.02734
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3117 -1.6484 -0.0677 1.4742 9.0462
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -1.02553 0.08131 -12.613 <2e-16 ***
## yd.diff.lag1 0.02312 0.07205 0.321 0.748
## yd.diff.lag2 0.04462 0.06225 0.717 0.474
## yd.diff.lag3 0.02216 0.05119 0.433 0.665
## yd.diff.lag4 -0.01698 0.03614 -0.470 0.639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.408 on 768 degrees of freedom
## Multiple R-squared: 0.5022, Adjusted R-squared: 0.4989
## F-statistic: 154.9 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -12.6128
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(1,1,2)(0,1,2)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2
## 0.6245 -1.5822 0.6327 -0.8965 -0.0343
## s.e. 0.1856 0.1714 0.1511 0.0389 0.0393
##
## sigma^2 = 5.875: log likelihood = -1773.01
## AIC=3558.02 AICc=3558.13 BIC=3585.86
mod1 <- Arima(sp$sum, order = c(1,1,2), seasonal = list(order=c(0,0,2),period=12))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -8.7327, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -779.52, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.22541, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4226 -1.5635 -0.0787 1.5187 10.3534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -1.05531 0.08111 -13.011 <2e-16 ***
## yd.diff.lag1 0.04599 0.07209 0.638 0.524
## yd.diff.lag2 0.07554 0.06231 1.212 0.226
## yd.diff.lag3 0.05228 0.05134 1.018 0.309
## yd.diff.lag4 0.02598 0.03614 0.719 0.472
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.448 on 768 degrees of freedom
## Multiple R-squared: 0.5055, Adjusted R-squared: 0.5023
## F-statistic: 157 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.0106
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(1,1,2)(0,0,2)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2
## 0.4991 -1.4765 0.5416 0.1166 0.1410
## s.e. 0.2228 0.2102 0.1868 0.0374 0.0336
##
## sigma^2 = 5.986: log likelihood = -1796.28
## AIC=3604.57 AICc=3604.68 BIC=3632.5
mod1 <- Arima(sp$sum, order = c(1,1,2), seasonal = list(order=c(2,0,0),period=12))
plot(mod1$residuals)
acf(mod1$residuals)
pacf(mod1$residuals)
adf.test(mod1$residuals)
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -8.1907, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -752.08, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.24364, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7608 -1.5063 -0.0552 1.5504 9.8797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -1.08615 0.08297 -13.090 <2e-16 ***
## yd.diff.lag1 0.08320 0.07341 1.133 0.257
## yd.diff.lag2 0.07076 0.06298 1.123 0.262
## yd.diff.lag3 0.04235 0.05122 0.827 0.409
## yd.diff.lag4 0.01414 0.03616 0.391 0.696
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.438 on 768 degrees of freedom
## Multiple R-squared: 0.5012, Adjusted R-squared: 0.498
## F-statistic: 154.4 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.0904
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
mod1
## Series: sp$sum
## ARIMA(1,1,2)(2,0,0)[12]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sar2
## -0.0794 -0.9048 0.0299 0.1406 0.166
## s.e. NaN 0.0198 NaN NaN NaN
##
## sigma^2 = 5.931: log likelihood = -1792.89
## AIC=3597.78 AICc=3597.89 BIC=3625.72
mod1 <- Arima(sp$sum, order = c(1,1,2), seasonal = list(order=c(1,0,1),period=12))
tsplot(mod1$residuals)
coeftest(mod1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.452613 0.295211 1.5332 0.12523
## ma1 -1.426063 0.282277 -5.0520 4.372e-07 ***
## ma2 0.493554 0.252045 1.9582 0.05021 .
## sar1 0.902149 0.044113 20.4508 < 2.2e-16 ***
## sma1 -0.774775 0.064174 -12.0731 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(mod1$residuals)
pacf(mod1$residuals)
mod1
## Series: sp$sum
## ARIMA(1,1,2)(1,0,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sma1
## 0.4526 -1.4261 0.4936 0.9021 -0.7748
## s.e. 0.2952 0.2823 0.2520 0.0441 0.0642
##
## sigma^2 = 5.809: log likelihood = -1785.51
## AIC=3583.01 AICc=3583.12 BIC=3610.95
coeftest(Arima(sp$sum, order = c(1,1,2), seasonal = list(order=c(1,0,1),period=12), include.drift = FALSE))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.452613 0.295211 1.5332 0.12523
## ma1 -1.426063 0.282277 -5.0520 4.372e-07 ***
## ma2 0.493554 0.252045 1.9582 0.05021 .
## sar1 0.902149 0.044113 20.4508 < 2.2e-16 ***
## sma1 -0.774775 0.064174 -12.0731 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
My best model:
mod1 <- Arima(sp$sum, order = c(0,1,2), seasonal = list(order=c(1,0,1),period=12))
tsplot(mod1$residuals)
coeftest(mod1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.979484 0.036188 -27.067 < 2e-16 ***
## ma2 0.092767 0.036081 2.571 0.01014 *
## sar1 0.894297 0.043488 20.564 < 2e-16 ***
## sma1 -0.759103 0.061293 -12.385 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(mod1$residuals)
pacf(mod1$residuals)
mod1
## Series: sp$sum
## ARIMA(0,1,2)(1,0,1)[12]
##
## Coefficients:
## ma1 ma2 sar1 sma1
## -0.9795 0.0928 0.8943 -0.7591
## s.e. 0.0362 0.0361 0.0435 0.0613
##
## sigma^2 = 5.806: log likelihood = -1785.77
## AIC=3581.54 AICc=3581.61 BIC=3604.81
#stationary tests for residuals
adf.test(mod1$residuals)
## Warning in adf.test(mod1$residuals): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: mod1$residuals
## Dickey-Fuller = -8.3179, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
pp.test(mod1$residuals)
## Warning in pp.test(mod1$residuals): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: mod1$residuals
## Dickey-Fuller Z(alpha) = -753.87, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(mod1$residuals)
## Warning in kpss.test(mod1$residuals): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: mod1$residuals
## KPSS Level = 0.25193, Truncation lag parameter = 6, p-value = 0.1
summary(ur.ers(mod1$residuals))
##
## ###############################################
## # Elliot, Rothenberg and Stock Unit Root Test #
## ###############################################
##
## Test of type DF-GLS
## detrending of series with intercept
##
##
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4717 -1.5070 -0.0452 1.5846 9.1575
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## yd.lag -1.067953 0.082722 -12.910 <2e-16 ***
## yd.diff.lag1 0.065373 0.073109 0.894 0.372
## yd.diff.lag2 0.064206 0.062748 1.023 0.307
## yd.diff.lag3 0.030356 0.051216 0.593 0.554
## yd.diff.lag4 -0.003602 0.036158 -0.100 0.921
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.413 on 768 degrees of freedom
## Multiple R-squared: 0.5014, Adjusted R-squared: 0.4982
## F-statistic: 154.5 on 5 and 768 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -12.9101
##
## Critical values of DF-GLS are:
## 1pct 5pct 10pct
## critical values -2.57 -1.94 -1.62
Model utility tests:
checkresiduals(mod1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)(1,0,1)[12]
## Q* = 8.8615, df = 6, p-value = 0.1815
##
## Model df: 4. Total lags used: 10
tsdiag(mod1) # looks good
mod11 = sarima(spSum,p=0,d=1,q=2,P=1,D=0,Q=1,S=12)
## initial value 1.258835
## iter 2 value 1.038084
## iter 3 value 0.957681
## iter 4 value 0.935442
## iter 5 value 0.914577
## iter 6 value 0.910710
## iter 7 value 0.907368
## iter 8 value 0.906012
## iter 9 value 0.905656
## iter 10 value 0.904506
## iter 11 value 0.904172
## iter 12 value 0.890556
## iter 13 value 0.889516
## iter 14 value 0.887656
## iter 15 value 0.885646
## iter 16 value 0.885252
## iter 17 value 0.884968
## iter 18 value 0.884651
## iter 19 value 0.884645
## iter 20 value 0.884645
## iter 21 value 0.884645
## iter 21 value 0.884645
## iter 21 value 0.884645
## final value 0.884645
## converged
## initial value 0.879230
## iter 2 value 0.878972
## iter 3 value 0.878927
## iter 4 value 0.878924
## iter 5 value 0.878843
## iter 6 value 0.878634
## iter 7 value 0.878456
## iter 8 value 0.878387
## iter 9 value 0.878366
## iter 10 value 0.878365
## iter 10 value 0.878365
## iter 10 value 0.878365
## final value 0.878365
## converged
densityplot(as.numeric(mod1$residuals)) # these look uniform
summary(mod1)
## Series: sp$sum
## ARIMA(0,1,2)(1,0,1)[12]
##
## Coefficients:
## ma1 ma2 sar1 sma1
## -0.9795 0.0928 0.8943 -0.7591
## s.e. 0.0362 0.0361 0.0435 0.0613
##
## sigma^2 = 5.806: log likelihood = -1785.77
## AIC=3581.54 AICc=3581.61 BIC=3604.81
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.09565936 2.40171 1.848773 -Inf Inf 0.6886369 -0.002329452
coeftest(mod1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.979484 0.036188 -27.067 < 2e-16 ***
## ma2 0.092767 0.036081 2.571 0.01014 *
## sar1 0.894297 0.043488 20.564 < 2e-16 ***
## sma1 -0.759103 0.061293 -12.385 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pred = forecast(mod1,h=60)
pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 779 13.71608 10.62823 16.80393 8.993615 18.43854
## 780 13.75000 10.66150 16.83850 9.026541 18.47346
## 781 13.82121 10.71296 16.92945 9.067550 18.57486
## 782 15.33060 12.20273 18.45847 10.546937 20.11427
## 783 17.87859 14.73122 21.02596 13.065107 22.69208
## 784 13.32389 10.15715 16.49064 8.480771 18.16702
## 785 14.08860 10.90259 17.27460 9.216014 18.96118
## 786 14.49659 11.29143 17.70174 9.594727 19.39845
## 787 14.14546 10.92127 17.36965 9.214493 19.07643
## 788 14.20046 10.95735 17.44356 9.240553 19.16036
## 789 14.95331 11.69140 18.21523 9.964642 19.94198
## 790 15.43023 12.14961 18.71085 10.412954 20.44750
## 791 14.68772 11.31857 18.05686 9.535050 19.84038
## 792 14.49282 11.10467 17.88098 9.311092 19.67456
## 793 14.55651 11.14516 17.96785 9.339307 19.77370
## 794 15.90635 12.47198 19.34073 10.653927 21.15878
## 795 18.18501 14.72776 21.64227 12.897596 23.47243
## 796 14.11176 10.63177 17.59175 8.789581 19.43394
## 797 14.79563 11.29306 18.29820 9.438914 20.15235
## 798 15.16050 11.63549 18.68550 9.769465 20.55153
## 799 14.84649 11.29918 18.39379 9.421355 20.27162
## 800 14.89567 11.32621 18.46513 9.436651 20.35468
## 801 15.56895 11.97747 19.16042 10.076253 21.06164
## 802 15.99545 12.38208 19.60881 10.469285 21.52161
## 803 15.33142 11.63684 19.02601 9.681042 20.98180
## 804 15.15713 11.44044 18.87382 9.472946 20.84132
## 805 15.21408 11.47151 18.95665 9.490313 20.93785
## 806 16.42125 12.65297 20.18952 10.658166 22.18433
## 807 18.45905 14.66524 22.25285 12.656921 24.26117
## 808 14.81635 10.99718 18.63551 8.975440 20.65726
## 809 15.42793 11.58357 19.27229 9.548495 21.30737
## 810 15.75423 11.88485 19.62361 9.836517 21.67194
## 811 15.47341 11.57916 19.36766 9.517666 21.42915
## 812 15.51739 11.59843 19.43635 9.523860 21.51093
## 813 16.11950 12.17599 20.06302 10.088417 22.15059
## 814 16.50092 12.53301 20.46884 10.432517 22.56933
## 815 15.90709 11.86453 19.94965 9.724524 22.08965
## 816 15.75122 11.68411 19.81833 9.531105 21.97133
## 817 15.80215 11.70713 19.89716 9.539365 22.06493
## 818 16.88171 12.75899 21.00444 10.576547 23.18688
## 819 18.70411 14.55386 22.85436 12.356847 25.05137
## 820 15.44646 11.26886 19.62406 9.057373 21.83554
## 821 15.99339 11.78863 20.19816 9.562761 22.42403
## 822 16.28520 12.05344 20.51696 9.813287 22.75712
## 823 16.03407 11.77549 20.29265 9.521130 22.54700
## 824 16.07340 11.78817 20.35863 9.519701 22.62710
## 825 16.61187 12.30014 20.92359 10.017654 23.20608
## 826 16.95297 12.61492 21.29102 10.318493 23.58744
## 827 16.42190 12.01503 20.82877 9.682172 23.16163
## 828 16.28251 11.84921 20.71581 9.502366 23.06266
## 829 16.32806 11.86540 20.79071 9.503016 23.15310
## 830 17.29351 12.80169 21.78533 10.423863 24.16315
## 831 18.92327 14.40248 23.44407 12.009312 25.83723
## 832 16.00996 11.46038 20.55955 9.051970 22.96796
## 833 16.49909 11.92089 21.07728 9.497337 23.50084
## 834 16.76005 12.15342 21.36668 9.714816 23.80528
## 835 16.53546 11.90057 21.17035 9.447008 23.62391
## 836 16.57064 11.90766 21.23361 9.439228 23.70204
## 837 17.05218 12.36129 21.74308 9.878077 24.22629
## 838 17.35723 12.63858 22.07588 10.140679 24.57378
Forecasting:
forecastArea <- forecast(mod1$fitted, h = 60)
plot(forecastArea,lwd=2,col="purple", main="Forecasts from ARIMA(0,1,2)(1,0,1)[12]", xlab="Time", ylab="Number of space missions")
legend("topleft", legend=c("Past", "Future"), col=c("Purple", "Blue"), lty=1:2, cex=0.8)
Function of time
sp$M = as.factor(month(sp$month))
sp$Y = as.factor(year(sp$month))
modY=lm(spSum~sp$month, data=sp)
summary(modY)
##
## Call:
## lm(formula = spSum ~ sp$month, data = sp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9667 -1.9639 -0.9419 2.0423 18.0642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.961e+00 1.768e-01 33.713 <2e-16 ***
## sp$month -1.321e-06 1.761e-05 -0.075 0.94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.359 on 776 degrees of freedom
## Multiple R-squared: 7.243e-06, Adjusted R-squared: -0.001281
## F-statistic: 0.00562 on 1 and 776 DF, p-value: 0.9403
ResidLinear=ts(modY$residuals)
plot(ResidLinear,cex=1.5,cex.lab=1.5,cex.axis=1.5,lwd=2,col="darkblue",xlab="t",ylab="Residual",main="Residuals - Linear Fit")
abline(0,0,col="red")
points(sp$month,modY$residuals,pch=19,col="darkblue")
t = ts(sp$month)
fit = lm(spSum~ t + I(t^2) + cos(t*(2*pi/12)) + sin(t*(2*pi/12)) , na.action=NULL)
summary(fit)
##
## Call:
## lm(formula = spSum ~ t + I(t^2) + cos(t * (2 * pi/12)) + sin(t *
## (2 * pi/12)), na.action = NULL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9305 -2.4350 -0.4588 1.7071 17.0197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.043e+00 1.766e-01 34.222 < 2e-16 ***
## t -1.678e-04 4.549e-05 -3.688 0.000242 ***
## I(t^2) 1.132e-08 2.857e-09 3.963 8.09e-05 ***
## cos(t * (2 * pi/12)) -2.060e-02 1.695e-01 -0.122 0.903310
## sin(t * (2 * pi/12)) 6.578e-02 1.683e-01 0.391 0.696035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.331 on 773 degrees of freedom
## Multiple R-squared: 0.02012, Adjusted R-squared: 0.01505
## F-statistic: 3.968 on 4 and 773 DF, p-value: 0.003403
t = ts(sp$month)
fit = lm(sp$sum ~ I(t^2) + as.factor(sp$M) , na.action=NULL)
summary(fit)
##
## Call:
## lm(formula = sp$sum ~ I(t^2) + as.factor(sp$M), na.action = NULL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4314 -2.3861 -0.5167 1.8734 16.0159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.209e+00 4.232e-01 9.946 < 2e-16 ***
## I(t^2) 1.597e-09 1.086e-09 1.471 0.141727
## as.factor(sp$M)2 1.184e+00 5.783e-01 2.047 0.040969 *
## as.factor(sp$M)3 1.445e+00 5.783e-01 2.498 0.012683 *
## as.factor(sp$M)4 1.921e+00 5.783e-01 3.322 0.000936 ***
## as.factor(sp$M)5 1.013e+00 5.783e-01 1.751 0.080345 .
## as.factor(sp$M)6 2.258e+00 5.783e-01 3.905 0.000103 ***
## as.factor(sp$M)7 1.473e+00 5.783e-01 2.547 0.011072 *
## as.factor(sp$M)8 1.742e+00 5.805e-01 3.000 0.002785 **
## as.factor(sp$M)9 1.663e+00 5.805e-01 2.864 0.004291 **
## as.factor(sp$M)10 1.833e+00 5.783e-01 3.170 0.001587 **
## as.factor(sp$M)11 1.248e+00 5.783e-01 2.157 0.031283 *
## as.factor(sp$M)12 3.201e+00 5.783e-01 5.535 4.28e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.297 on 765 degrees of freedom
## Multiple R-squared: 0.0503, Adjusted R-squared: 0.0354
## F-statistic: 3.377 on 12 and 765 DF, p-value: 7.945e-05
vif(fit)
## GVIF Df GVIF^(1/(2*Df))
## I(t^2) 1.00014 1 1.000070
## as.factor(sp$M) 1.00014 11 1.000006
t = ts(sp$month)
fit = lm(sp$sum ~ as.factor(sp$M) , na.action=NULL)
summary(fit)
##
## Call:
## lm(formula = sp$sum ~ as.factor(sp$M), na.action = NULL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5692 -2.3692 -0.5615 1.8906 16.4308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3692 0.4092 10.677 < 2e-16 ***
## as.factor(sp$M)2 1.1846 0.5787 2.047 0.040998 *
## as.factor(sp$M)3 1.4462 0.5787 2.499 0.012665 *
## as.factor(sp$M)4 1.9231 0.5787 3.323 0.000933 ***
## as.factor(sp$M)5 1.0154 0.5787 1.755 0.079731 .
## as.factor(sp$M)6 2.2615 0.5787 3.908 0.000101 ***
## as.factor(sp$M)7 1.4769 0.5787 2.552 0.010900 *
## as.factor(sp$M)8 1.7401 0.5810 2.995 0.002830 **
## as.factor(sp$M)9 1.6620 0.5810 2.861 0.004341 **
## as.factor(sp$M)10 1.8308 0.5787 3.164 0.001620 **
## as.factor(sp$M)11 1.2462 0.5787 2.153 0.031603 *
## as.factor(sp$M)12 3.2000 0.5787 5.530 4.41e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.299 on 766 degrees of freedom
## Multiple R-squared: 0.04762, Adjusted R-squared: 0.03394
## F-statistic: 3.482 on 11 and 766 DF, p-value: 9.177e-05